shhh <- suppressPackageStartupMessages # It's a library, so shhh!
shhh(library( mgcv ))
shhh(library(dplyr))
shhh(library(ggplot2))
shhh(library(lme4))
shhh(library(tidymv))
shhh(library(gamlss))
shhh(library(gsubfn))
shhh(library(lmerTest))
shhh(library(tidyverse))
shhh(library(boot))
shhh(library(rsample))
shhh(library(plotrix))
shhh(library(ggrepel))
shhh(library(mgcv))
shhh(library(brms))
shhh(library(bayesplot))
shhh(library(patchwork))
shhh(library(MASS))
shhh(library(tidyr))
shhh(library(extraDistr))
shhh(library(purrr))
# For exercises with Stan code
shhh(library(rstan))
options(mc.cores = parallel::detectCores())
rstan_options(auto_write = FALSE)
library(car)
library(coda)
shhh(library(gridExtra))
theme_set(theme_bw())
options(digits=4)
options(scipen=999)
set.seed(444)
pipe_message = function(.data, status) {message(status); .data}
rate = 160
file_prefix = "../reading_measures/provo/provo_f160/"
fnames = list.files(path=file_prefix)
df = data.frame()
for (f in fnames) {
temp = read.csv(paste0(file_prefix, "/", f)) %>%
mutate(subj = str_remove(f, "_reading_measures.csv"))
df = rbind(df, temp)
}
# Filter out readers who don't answer the comprehension questions correctly
filter_df = df %>%
group_by(para_nr, subj) %>% summarise(correct = if_else(unique(correctness) == 1, 1, 0)) %>% ungroup() %>%
drop_na() %>%
group_by(subj) %>% summarise(p_correct = mean(correct)) %>% ungroup() %>%
mutate(p_correct = round(p_correct, digits = 2))
`summarise()` has grouped output by 'para_nr'. You can override using the `.groups` argument.
filter_df = filter_df %>% filter(p_correct < 0.8)
filter_list = filter_df$subj
## reader_3:0.70, reader_60:0.79, reader_76:0.72 , reader_256:0.71 , reader_262:0.57
raw_df = df %>%
filter(! subj %in% c(filter_list)) %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "reader_")) %>%
mutate(subj = as.character(subj)) %>%
# filter(! subj %in% c("3", "60", "76", "256", "262")) %>% # Explanation for this filtering
mutate(FPReg = if_else(total_duration == 0, -1, FPReg)) %>% #If the word is skipped we can't say that it wasn't regressed on the first pass. Set to a "NA"
dplyr::select(expr_id, cond_id, para_nr, word, word_nr, first_duration, total_duration, gaze_duration, go_past_time, FPReg, subj)
length(unique(raw_df$subj))
[1] 46
df %>%
filter(! subj %in% c(filter_list)) %>%
filter(FPReg >= 0) %>%
dplyr::select(FPReg) %>%
drop_na() %>%
summarise( m = mean(FPReg))
df %>%
filter(! subj %in% c(filter_list)) %>%
dplyr::select(FPFix) %>%
drop_na() %>%
summarise( m = mean(FPFix))
NA
NA
# Average across subjects
motr_agg_df = raw_df %>%
gather(metric, value, 6:10) %>%
filter(value >= 0) %>% #Removes the "NA" values for FPReg
# ==== Remove skipped words
# mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
# filter(zero == F) %>%
drop_na() %>%
group_by(para_nr, word_nr, word, metric) %>%
mutate(outlier = if_else(metric != "FPReg" & value > (mean(value) + 3 * sd(value)), T, F)) %>% filter(outlier == F) %>%
summarise(value = mean(value), nsubj = length(unique(subj))) %>%
ungroup() %>%
arrange(para_nr, word_nr) %>%
rename(text_id = para_nr, word_text_idx = word_nr, motr_value = value)
`summarise()` has grouped output by 'para_nr', 'word_nr', 'word'. You can override using the `.groups` argument.
# View(motr_agg_df)
# write.csv(motr_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/motr_agg_df.csv", row.names = FALSE)
# Read in Provo surprisal, frequency and length data
provo_modeling_df = read.csv("../ancillary_data/provo_df.csv") %>%
dplyr::select(text_id, sent_id, trigger_idx, word, freq, surp, len) %>%
rename(word_idx = trigger_idx)
provo_modeling_df
# View(provo_modeling_df)
# Read in Provo eyetracking data
provo_raw_df = read.csv("../ancillary_data/provo_eyetracking.csv")
# unique(provo_raw_df$Participant_ID)
# length(unique(provo_raw_df$Participant_ID))
provo_eyetracking_df = provo_raw_df %>%
dplyr::select(Participant_ID, Text_ID, Sentence_Number, Word_In_Sentence_Number, Word, Word_Number, IA_FIRST_FIX_PROGRESSIVE, IA_FIRST_RUN_DWELL_TIME, IA_DWELL_TIME, IA_REGRESSION_PATH_DURATION, IA_REGRESSION_OUT, IA_SKIP) %>%
rename( #first_duration = IA_FIRST_FIXATION_DURATION,
gaze_duration = IA_FIRST_RUN_DWELL_TIME,
total_duration = IA_DWELL_TIME,
go_past_time = IA_REGRESSION_PATH_DURATION,
subj = Participant_ID,
text_id = Text_ID,
sent_id = Sentence_Number,
word_idx = Word_In_Sentence_Number,
word_text_idx = Word_Number, # IA_ID?
word = Word, # Word?
FPReg = IA_REGRESSION_OUT,
skip = IA_SKIP,
ff_progressive = IA_FIRST_FIX_PROGRESSIVE) %>%
mutate(first_duration = gaze_duration) %>%
mutate(gaze_duration = if_else(ff_progressive == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(ff_progressive == 0, 0, as.double(go_past_time))) %>%
dplyr::select(-ff_progressive) %>%
mutate(
gaze_duration = if_else(total_duration == 0, 0, as.double(gaze_duration)),
go_past_time = if_else(total_duration == 0, 0, as.double(go_past_time)),
FPReg = if_else(total_duration == 0, -1, as.double(FPReg)),
first_duration = if_else(total_duration == 0, 0, as.double(first_duration)),
) %>%
# drop_na() %>% # will drop the whole row with all the metrics
gather(metric, value, 7:12) %>%
filter(value >= 0) %>% # filter skipped word in eye tracking data for FPReg
# ==== Remove skipped words
# mutate(zero = if_else(metric != "FPReg" & value == 0,T, F)) %>%
# filter(zero == F) %>%
# mutate(value = if_else(is.na(value), as.integer(0), as.integer(value))) %>%
# mutate(value = if_else(metric != "FPReg" & is.na(value), as.integer(0), as.integer(value))) %>%
drop_na() %>%
mutate(word = str_trim(word)) %>%
mutate(subj = str_remove(subj, "Sub")) %>%
mutate(subj = as.integer(subj)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
mutate(outlier = if_else(metric != "FPReg" & metric != "skip" & value > (mean(value) + 3 * sd(value) ), T, F)) %>%
filter(outlier == F) %>%
ungroup() #%>%
# Aggregate cross-participant data for all subjects
provo_eyetracking_agg_df = provo_eyetracking_df %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value),
nsubj = length(unique(subj))) %>%
ungroup()
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_df)
# View(provo_eyetracking_agg_df)
# write.csv(provo_eyetracking_agg_df, file = "/Users/cui/Desktop/MoTR/pipeline/ancillary_data/provo_eyetracking_agg_df.csv", row.names = FALSE)
provo_raw_df %>%
dplyr::select(IA_REGRESSION_OUT) %>%
drop_na() %>%
summarise( m = mean(IA_REGRESSION_OUT))
provo_raw_df %>%
dplyr::select(IA_SKIP) %>%
drop_na() %>%
summarise( m = mean(IA_SKIP))
NA
NA
# Split the eyetracking data in two by subjects to see how well it correlates with itself
provo_eyetracking_subj1_df_temp = provo_eyetracking_df %>%
filter(subj <= 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_1 = value) #%>%
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# dplyr::select(-sent_id, -word_idx)
# View(provo_eyetracking_subj1_df_temp)
provo_eyetracking_subj1_df = merge(provo_eyetracking_subj1_df_temp, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
rename(word = word.y) %>%
dplyr::select(text_id, word_text_idx, metric, word, value_1)
# View(provo_eyetracking_subj1_df)
provo_eyetracking_subj2_df = provo_eyetracking_df %>%
filter(subj > 42) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
group_by(text_id, word_text_idx, sent_id, word_idx, word, metric) %>%
summarise(value = mean(value)) %>%
ungroup() %>%
rename(value_2 = value)%>%
dplyr::select(-sent_id, -word_idx)
`summarise()` has grouped output by 'text_id', 'word_text_idx', 'sent_id', 'word_idx', 'word'. You can override using the `.groups` argument.
# View(provo_eyetracking_subj2_df)
provo_eyetr_grouped_df = merge(provo_eyetracking_subj2_df, provo_eyetracking_subj1_df, by=c("text_id", "word_text_idx", "metric")) %>%
# filter(word.x == word.y) %>%
dplyr::select(-word.y) %>%
group_by(metric) %>%
mutate(motr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_1 > (mean(value_1) + 3 * sd(value_1) ), T, F)) %>%
filter(motr_outlier == F) %>%
mutate(eyetr_outlier = if_else(metric != "FPReg" & metric != "skip" & value_2 > (mean(value_2) + 3 * sd(value_2) ), T, F)) %>%
filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("value_1", "value_2")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
# View(provo_eyetr_grouped_df)
provo_df = merge(provo_eyetracking_agg_df, provo_modeling_df, by=c("text_id", "sent_id", "word_idx")) %>%
mutate(word_text_idx = as.integer(word_text_idx - 1)) %>%
arrange(text_id, sent_id, word_idx) %>%
rename(eyetr_value = value)
provo_df = merge(provo_df, motr_agg_df, by=c("text_id", "word_text_idx", "metric")) %>%
arrange(text_id, sent_id, word_idx) %>%
# almost all the word.x != word.y is because of normalization problem, so we can keep them, instead, deleting some special cases
filter(!(text_id == 13 & word_text_idx >= 20 & word_text_idx <= 52)) %>%
filter(!(text_id == 3 & word_text_idx >= 46 & word_text_idx <= 57)) %>%
# filter(word.x == word) #%>%
dplyr::select(-word.x, -word.y) %>%
group_by(metric) %>%
mutate(motr_outlier = if_else(metric != "FPReg" & motr_value > (mean(motr_value) + 3 * sd(motr_value) ), T, F)) %>%
filter(motr_outlier == F) %>%
mutate(eyetr_outlier = if_else(metric != "FPReg" & eyetr_value > (mean(eyetr_value) + 3 * sd(eyetr_value) ), T, F)) %>%
filter(eyetr_outlier == F) %>%
ungroup() %>%
gather(measure, value, c("eyetr_value", "motr_value")) %>%
dplyr::select(-motr_outlier, -eyetr_outlier)
# View(provo_df)
# provo_df
print("Gaze Duration")
[1] "Gaze Duration"
gd_df = provo_df %>% filter(metric == "gaze_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gd_df$eyetr_value, gd_df$motr_value)$estimate)
cor
0.7874
print(cor.test(gd_df$eyetr_value_log, gd_df$motr_value_log)$estimate)
cor
0.6055
# View(gd_df)
gd_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
NA
# center data around 0.
gd_temp <- gd_df[c("eyetr_value", "motr_value")] %>%
# mutate(eyetr_value = eyetr_value - mean(eyetr_value),
# motr_value = motr_value - mean(motr_value)) %>%
data.matrix()
gd_temp_log <- gd_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(gd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gd_temp_log
plot(gd_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")
gd_data = list(x=gd_temp, N=nrow(gd_temp))
fit_gd = stan(
file="stan_models/bivariate_correlation.stan",
data=gd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gd, file = paste0("motr_eyetr_gaze_duration_cor_drop0s.rds"))
print("Go Past Time")
[1] "Go Past Time"
gpt_df = provo_df %>% filter(metric == "go_past_time") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(gpt_df$eyetr_value, gpt_df$motr_value)$estimate)
cor
0.7292
print(cor.test(gpt_df$eyetr_value_log, gpt_df$motr_value_log)$estimate)
cor
0.5889
gpt_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
gpt_temp <- gpt_df[c("eyetr_value", "motr_value")] %>% data.matrix()
gpt_temp_log <- gpt_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gpt_temp
plot(gpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix gpt_temp_log
plot(gpt_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")
# -------fit model go past time ----------
gpt_data = list(x=gpt_temp, N=nrow(gpt_temp))
fit_gpt = stan(
file="stan_models/bivariate_correlation.stan",
data=gpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_gpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_gpt, file = paste0("motr_eyetr_go_past_time_cor_drop0s.rds"))
print("Total Duration")
[1] "Total Duration"
td_df = provo_df %>% filter(metric == "total_duration") %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value = pmax(eyetr_value, 1),
motr_value = pmax(motr_value, 1)
) %>%
mutate(eyetr_value_log = log(eyetr_value),
motr_value_log = log(motr_value))
print(cor.test(td_df$eyetr_value, td_df$motr_value)$estimate)
cor
0.7601
print(cor.test(td_df$eyetr_value_log, td_df$motr_value_log)$estimate)
cor
0.6421
td_df %>%
gather(measure, value, 12:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
td_temp <- td_df[c("eyetr_value", "motr_value")] %>% data.matrix()
td_temp_log <- td_df[c("eyetr_value_log", "motr_value_log")] %>%
mutate(eyetr_value_log = eyetr_value_log - mean(eyetr_value_log),
motr_value_log = motr_value_log - mean(motr_value_log)) %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(td_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# Plot the second data matrix td_temp_log
plot(td_temp_log, pch = 16, col = "red",
main = "Centered Log-Transformed")
# -------fit model total duration ----------
td_data = list(x=td_temp, N=nrow(td_temp))
fit_td = stan(
file="stan_models/bivariate_correlation.stan",
data=td_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_td@stanmodel@dso <- new("cxxdso")
saveRDS(fit_td, file = paste0("motr_eyetr_total_duration_cor.rds"))
print("First Pass Regression Prob.")
[1] "First Pass Regression Prob."
reg_df = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0)
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5))
print(cor.test(reg_df$eyetr_value, reg_df$motr_value)$estimate)
cor
0.2454
# View(reg_df)
reg_df %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp <- reg_df[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
# -------fit model FPReg ----------
reg_data = list(x=reg_temp, N=nrow(reg_temp))
fit_reg = stan(
file="stan_models/bivariate_beta_correlation_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("motr_eyetr_FPReg_cor_drop0s.rds"))
# models with all 0s
fit_gd = readRDS("./motr_eyetr_gaze_duration_cor.rds")
fit_gpt = readRDS("./motr_eyetr_go_past_time_cor.rds")
fit_td = readRDS("./motr_eyetr_total_duration_cor.rds")
fit_reg = readRDS("./motr_eyetr_FPReg_cor.rds")
# models for drop 0s
# fit_gd = readRDS("./motr_eyetr_gaze_duration_cor_drop0s.rds")
# fit_gpt = readRDS("./motr_eyetr_go_past_time_cor_drop0s.rds")
# fit_td = readRDS("./motr_eyetr_total_duration_cor_drop0s.rds")
# fit_reg = readRDS("./motr_eyetr_FPReg_cor_drop0s.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_gd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 125.79 0.01 1.06 123.70 125.05 125.78 126.52 127.85 6415 1
mu[2] 201.36 0.03 2.02 197.35 200.00 201.35 202.73 205.33 6243 1
sigma[1] 75.24 0.01 0.85 73.59 74.67 75.23 75.80 76.93 5554 1
sigma[2] 141.51 0.02 1.62 138.34 140.42 141.51 142.60 144.67 5947 1
nu 67.21 0.20 17.95 39.60 54.39 64.56 77.13 109.21 8434 1
rho 0.82 0.00 0.01 0.80 0.81 0.82 0.82 0.83 6766 1
cov[1,1] 5662.24 1.72 128.14 5416.22 5575.11 5659.86 5745.30 5917.61 5551 1
cov[1,2] 8685.45 3.11 212.25 8279.31 8538.73 8681.39 8824.04 9105.51 4657 1
cov[2,1] 8685.45 3.11 212.25 8279.31 8538.73 8681.39 8824.04 9105.51 4657 1
cov[2,2] 20027.42 5.94 457.93 19138.56 19716.41 20025.43 20333.73 20929.67 5948 1
x_rand[1] 138.57 0.76 67.06 20.92 90.39 135.44 181.71 278.91 7876 1
x_rand[2] 228.21 1.42 123.41 23.91 135.54 219.29 310.19 484.45 7563 1
attempt 0.11 0.00 0.34 0.00 0.00 0.00 0.00 1.00 8077 1
max_attempts 1000.00 NaN 0.00 1000.00 1000.00 1000.00 1000.00 1000.00 NaN NaN
lp__ -55825.25 0.03 1.73 -55829.39 -55826.17 -55824.95 -55823.98 -55822.84 3949 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 14:54:30 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_gpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 147.00 0.02 1.50 144.05 145.98 147.01 148.00 149.89 6405 1
mu[2] 205.47 0.03 2.45 200.61 203.83 205.51 207.14 210.20 6441 1
sigma[1] 94.99 0.02 1.29 92.47 94.13 95.01 95.86 97.55 5688 1
sigma[2] 155.50 0.03 2.15 151.26 154.09 155.49 156.95 159.73 5766 1
nu 7.54 0.01 0.57 6.53 7.14 7.50 7.89 8.75 6096 1
rho 0.80 0.00 0.01 0.79 0.80 0.80 0.81 0.82 7275 1
cov[1,1] 9025.61 3.26 245.47 8551.40 8860.01 9025.97 9188.35 9516.81 5687 1
cov[1,2] 11835.28 4.75 338.12 11168.95 11610.06 11836.87 12061.87 12507.53 5075 1
cov[2,1] 11835.28 4.75 338.12 11168.95 11610.06 11836.87 12061.87 12507.53 5075 1
cov[2,2] 24186.07 8.79 667.52 22879.89 23743.24 24176.78 24634.26 25514.54 5770 1
x_rand[1] 174.00 1.04 93.42 23.81 107.45 163.64 228.93 381.05 8038 1
x_rand[2] 250.51 1.65 147.29 25.35 144.88 232.92 337.60 580.10 8015 1
attempt 0.17 0.00 0.45 0.00 0.00 0.00 0.00 1.00 8278 1
max_attempts 1000.00 NaN 0.00 1000.00 1000.00 1000.00 1000.00 1000.00 NaN NaN
lp__ -57729.27 0.03 1.72 -57733.34 -57730.21 -57728.95 -57728.00 -57726.89 3578 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 15:12:39 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_td)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 173.42 0.02 1.42 170.69 172.45 173.41 174.36 176.23 5366 1
mu[2] 250.34 0.03 2.63 245.22 248.58 250.33 252.11 255.51 5883 1
sigma[1] 95.97 0.02 1.23 93.60 95.16 95.97 96.77 98.44 5507 1
sigma[2] 177.77 0.03 2.34 173.20 176.17 177.73 179.36 182.38 5591 1
nu 21.87 0.06 4.44 15.31 18.76 21.22 24.20 32.11 4880 1
rho 0.80 0.00 0.01 0.79 0.79 0.80 0.80 0.81 7369 1
cov[1,1] 9212.50 3.18 235.62 8760.98 9054.56 9209.71 9364.36 9690.76 5503 1
cov[1,2] 13638.88 5.20 366.78 12944.80 13389.27 13630.18 13883.70 14380.99 4976 1
cov[2,1] 13638.88 5.20 366.78 12944.80 13389.27 13630.18 13883.70 14380.99 4976 1
cov[2,2] 31608.56 11.12 830.86 29999.18 31035.03 31589.54 32169.43 33263.08 5587 1
x_rand[1] 190.27 0.98 88.62 36.10 126.07 185.26 247.04 375.18 8190 1
x_rand[2] 285.15 1.82 158.43 27.54 166.89 270.34 384.29 636.46 7595 1
attempt 0.11 0.00 0.35 0.00 0.00 0.00 0.00 1.00 7801 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -58397.55 0.03 1.77 -58401.90 -58398.49 -58397.22 -58396.27 -58395.11 3901 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 16:14:49 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_reg)
Inference for Stan model: bivariate_correlation_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
alpha[1] 0.85 0.00 0.02 0.81 0.84 0.85 0.86 0.89 5443 1
alpha[2] 0.15 0.00 0.00 0.14 0.14 0.15 0.15 0.15 7702 1
beta[1] 4.64 0.00 0.14 4.37 4.54 4.64 4.73 4.92 5975 1
beta[2] 2.52 0.00 0.12 2.29 2.44 2.52 2.60 2.76 7877 1
L[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
L[1,2] 0.00 NaN 0.00 0.00 0.00 0.00 0.00 0.00 NaN NaN
L[2,1] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
L[2,2] 0.78 0.00 0.21 0.31 0.64 0.85 0.96 1.00 6201 1
mu[1] 2.34 0.00 0.05 2.25 2.31 2.34 2.37 2.44 5468 1
mu[2] 1.16 0.00 0.00 1.15 1.16 1.16 1.16 1.16 7701 1
sigma[1] 104.39 0.19 14.73 78.70 93.94 103.18 113.48 136.81 5984 1
sigma[2] 12.55 0.02 1.50 9.91 11.50 12.44 13.51 15.76 7872 1
rho[1,1] 1.00 NaN 0.00 1.00 1.00 1.00 1.00 1.00 NaN NaN
rho[1,2] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
rho[2,1] 0.45 0.00 0.38 -0.41 0.18 0.52 0.77 0.95 8489 1
rho[2,2] 1.00 0.00 0.00 1.00 1.00 1.00 1.00 1.00 592 1
Sigma[1,1] 11114.32 41.44 3195.37 6193.09 8824.35 10646.59 12878.40 18717.61 5946 1
Sigma[1,2] 583.46 5.69 518.49 -527.26 234.91 654.32 971.41 1433.33 8291 1
Sigma[2,1] 583.46 5.69 518.49 -527.26 234.91 654.32 971.41 1433.33 8291 1
Sigma[2,2] 159.88 0.44 38.81 98.15 132.31 154.69 182.47 248.29 7789 1
x_rand[1] 0.16 0.00 0.14 0.00 0.05 0.12 0.23 0.53 7955 1
x_rand[2] 0.06 0.00 0.12 0.00 0.00 0.00 0.05 0.45 7762 1
lp__ 14798.20 0.03 1.56 14794.21 14797.42 14798.55 14799.33 14800.22 3508 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 20:39:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_gd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_gd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_gd)
stan_dens(fit_gd, separate_chains = TRUE)
stan_plot(fit_gd)
# Go Past Time
stan_trace(fit_gpt)
stan_dens(fit_gpt, separate_chains = TRUE)
stan_plot(fit_gpt)
# Total Duration
stan_trace(fit_td)
stan_dens(fit_td, separate_chains = TRUE)
stan_plot(fit_td)
# FPReg
stan_trace(fit_reg)
stan_dens(fit_reg, separate_chains = TRUE)
stan_plot(fit_reg)
p1 <- stan_trace(fit_gd, pars = 'rho', inc_warmup = FALSE)
p2 <- stan_dens(fit_gd, pars = 'rho', separate_chains = TRUE)
p3 <- stan_trace(fit_gd, pars = 'mu[1]', inc_warmup = FALSE)
p4 <- stan_dens(fit_gd, pars = 'mu[1]', separate_chains = TRUE)
p5 <- stan_trace(fit_gd, pars = 'mu[2]', inc_warmup = FALSE)
p6 <- stan_dens(fit_gd, pars = 'mu[2]', separate_chains = TRUE)
p7 <- stan_trace(fit_gd, pars = 'sigma[1]', inc_warmup = FALSE)
p8 <- stan_dens(fit_gd, pars = 'sigma[1]', separate_chains = TRUE)
p9 <- stan_trace(fit_gd, pars = 'sigma[2]', inc_warmup = FALSE)
p10 <- stan_dens(fit_gd, pars = 'sigma[2]', separate_chains = TRUE)
p11 <- stan_trace(fit_gd, pars = 'nu', inc_warmup = FALSE)
p12 <- stan_dens(fit_gd, pars = 'nu', separate_chains = TRUE)
# Use grid.arrange() to arrange the plots
# grid.arrange(p1, p2, p3, p4, p5, p6, p7, p8, p9, p10, p11, p12, ncol=2, nrow=6)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_gd = as.numeric(extract(fit_gd, "rho")[[1]])
mean = mean(rho_gd)
crI = quantile(rho_gd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8156
HPD: [0.8029, 0.8278]
crI: [0.8029, 0.8278]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_gpt = as.numeric(extract(fit_gpt, "rho")[[1]])
mean = mean(rho_gpt)
crI = quantile(rho_gpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_gpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8011
HPD: [0.7863, 0.8161]
crI: [0.7857, 0.8157]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_td = as.numeric(extract(fit_td, "rho")[[1]])
mean = mean(rho_td)
crI = quantile(rho_td, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_td), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7993
HPD: [0.7847, 0.8127]
crI: [0.785, 0.8131]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_reg = as.numeric(extract(fit_reg, "rho[1, 2]")[[1]])
mean = mean(rho_reg)
crI = quantile(rho_reg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_reg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.4456
HPD: [-0.2832, 0.9833]
crI: [-0.4076, 0.9497]
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
gd_rand <- extract(fit_gd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gd_rand[,1], gd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
gpt_rand <- extract(fit_gpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(gpt_rand[,1], gpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(gpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(gpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(gpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
td_rand <- extract(fit_td, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(td_rand[,1], td_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(td_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(td_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(td_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
reg_rand <- extract(fit_reg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(reg_rand[,1], reg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(reg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(reg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
NA
NA
print("First Pass Regression Prob. all and < 0.3")
[1] "First Pass Regression Prob. all and < 0.3"
reg_df_all = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
# mutate(eyetr_value = pmax(eyetr_value, 1e-5),
# motr_value = pmax(motr_value, 1e-5))
mutate(s_eyetr_value = scale(eyetr_value),
s_motr_value = scale(motr_value))
reg_df_all_drop0s = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
# mutate(eyetr_value = pmax(eyetr_value, 1e-5),
# motr_value = pmax(motr_value, 1e-5))
mutate(s_eyetr_value = scale(eyetr_value),
s_motr_value = scale(motr_value))
reg_df_low_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
reg_df_low = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value < 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate)
cor
0.2454
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value)
[1] 0.000000000000000000000000000000000004739
print(cor.test(reg_df_all$s_eyetr_value, reg_df_all$s_motr_value)$estimate)
cor
0.2454
print(cor.test(reg_df_all$s_eyetr_value, reg_df_all$s_motr_value)$p.value)
[1] 0.000000000000000000000000000000000004739
print("--------------------------------------------------------------------")
[1] "--------------------------------------------------------------------"
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$estimate)
cor
0.3225
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$p.value)
[1] 0.00000000000000000000001143
print(cor.test(reg_df_all_drop0s$s_eyetr_value, reg_df_all_drop0s$s_motr_value)$estimate)
cor
0.3225
print(cor.test(reg_df_all_drop0s$s_eyetr_value, reg_df_all_drop0s$s_motr_value)$p.value)
[1] 0.00000000000000000000001143
# print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$estimate)
# print(cor.test(reg_df_low$eyetr_value, reg_df_low$motr_value)$p.value)
# print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$estimate)
# print(cor.test(reg_df_low_drop0$eyetr_value, reg_df_low_drop0$motr_value)$p.value)
# View(reg_df)
reg_df_low %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_all <- reg_df_all[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_all_scaled <- reg_df_all[c("s_eyetr_value", "s_motr_value")] %>% data.matrix()
reg_temp_low <- reg_df_low[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_low_drop0 <- reg_df_low_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
# plot(reg_temp_low, pch = 16, col = "blue",
# main = "Not Log-Transformed")
plot(reg_temp_all, pch = 16, col = "blue",
main = "Original data")
plot(reg_temp_all_scaled, pch = 16, col = "blue",
main = "0-1 scaled data")
# -------fit model FPReg < 0.3 ----------
reg_data = list(x=reg_temp_all, N=nrow(reg_temp_all))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("motr_eyetr_FPReg_cor_all_data_drop0s.rds"))
print("First Pass Regression Prob. >= 0.3")
[1] "First Pass Regression Prob. >= 0.3"
reg_df_high_drop0 = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
reg_df_high = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_value = pmax(eyetr_value, 1e-5),
motr_value = pmax(motr_value, 1e-5)) %>%
filter(eyetr_value >= 0.3)
# mutate(eyetr_value = exp(eyetr_value),
# motr_value = exp(motr_value)
# )
# View(reg_df)
# print("---------------------Pearson---------------------------")
# print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate, method='Pearson')
# print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value, method='Pearson')
# print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate, method='Pearson')
# print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value, method='Pearson')
# print("---------------------Spearman---------------------------")
# print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$estimate, method='Spearman')
# print(cor.test(reg_df_high$eyetr_value, reg_df_high$motr_value)$p.value, method='Spearman')
# print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$estimate, method='Spearman')
# print(cor.test(reg_df_high_drop0$eyetr_value, reg_df_high_drop0$motr_value)$p.value, method='Spearman')
# print("---------------------Scaled lm ---------------------------")
# m.scaled = lm(scale(motr_value) ~ 1 + scale(eyetr_value), data = reg_df_high)
# summary(m.scaled)
# View(reg_df)
reg_df_high %>%
gather(measure, value, 12:13) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_high <- reg_df_high[c("eyetr_value", "motr_value")] %>% data.matrix()
reg_temp_high_drop0 <- reg_df_high_drop0[c("eyetr_value", "motr_value")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix td_temp
plot(reg_temp_all, pch = 16, col = "blue",
main = "FPReg not logged all data")
plot(reg_temp_low, pch = 16, col = "blue",
main = "FPReg not logged eyetr < 0.3 ")
plot(reg_temp_high, pch = 16, col = "blue",
main = "FPReg not logged eyetr >= 0.3")
print("---------------------Pearson---------------------------")
[1] "---------------------Pearson---------------------------"
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate, method='Pearson')
cor
0.2454
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value, method='Pearson')
[1] 0.000000000000000000000000000000000004739
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$estimate, method='Pearson')
cor
0.3225
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$p.value, method='Pearson')
[1] 0.00000000000000000000001143
print("---------------------Spearman---------------------------")
[1] "---------------------Spearman---------------------------"
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$estimate, method='Spearman')
cor
0.2454
print(cor.test(reg_df_all$eyetr_value, reg_df_all$motr_value)$p.value, method='Spearman')
[1] 0.000000000000000000000000000000000004739
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$estimate, method='Spearman')
cor
0.3225
print(cor.test(reg_df_all_drop0s$eyetr_value, reg_df_all_drop0s$motr_value)$p.value, method='Spearman')
[1] 0.00000000000000000000001143
print("---------------------Scaled lm ---------------------------")
[1] "---------------------Scaled lm ---------------------------"
m.scaled = lm(scale(motr_value) ~ 1 + scale(eyetr_value), data = reg_df_all)
summary(m.scaled)
Call:
lm(formula = scale(motr_value) ~ 1 + scale(eyetr_value), data = reg_df_all)
Residuals:
Min 1Q Median 3Q Max
-1.74 -0.57 -0.40 0.44 6.84
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00000000000000059 0.01926560159151626 0.0 1
scale(eyetr_value) 0.24538623826853576 0.01926940563965124 12.7 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.97 on 2531 degrees of freedom
Multiple R-squared: 0.0602, Adjusted R-squared: 0.0598
F-statistic: 162 on 1 and 2531 DF, p-value: <0.0000000000000002
coef(m.scaled)[2]
scale(eyetr_value)
0.2454
summary(m.scaled)$coefficients[2, 4]
[1] 0.000000000000000000000000000000000004739
m.scaled_drop0s = lm(scale(motr_value) ~ 1 + scale(eyetr_value), data = reg_df_all_drop0s)
summary(m.scaled_drop0s)
Call:
lm(formula = scale(motr_value) ~ 1 + scale(eyetr_value), data = reg_df_all_drop0s)
Residuals:
Min 1Q Median 3Q Max
-1.779 -0.598 -0.214 0.368 5.549
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.00000000000000176 0.03125802679731712 0.0 1
scale(eyetr_value) 0.32254171528306036 0.03127506578838755 10.3 <0.0000000000000002 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.947 on 916 degrees of freedom
Multiple R-squared: 0.104, Adjusted R-squared: 0.103
F-statistic: 106 on 1 and 916 DF, p-value: <0.0000000000000002
# m.scaled_drop0s_inv = lm(scale(eyetr_value) ~ 1 + scale(motr_value), data = reg_df_all_drop0s)
# summary(m.scaled_drop0s_inv)
# -------fit model FPReg >= 0.3 ----------
reg_data = list(x=reg_temp_high_drop0, N=nrow(reg_temp_high_drop0))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("motr_eyetr_FPReg_cor_03-1_drop0s.rds"))
exp = "motr_eyetr_FPReg"
priors_cor_motr_eyetr <- c(
prior(normal(0, 10), class = Intercept),
prior(normal(0, 10), class = b),
prior(normal(0, 10), class = sigma)
)
model = brms::brm( scale(motr_value) ~ 1 + scale(eyetr_value),
data = reg_df_all,
chains = 4,
family = gaussian(),
file = paste0("./temp/", exp, "_lm_with0s"),
cores = 4,
backend = "cmdstanr",
prior = priors_cor_motr_eyetr,
warmup = 2000,
iter = 4000,
# control = list(adapt_delta = 0.9),
save_pars = save_pars(all = TRUE)
)
posterior_samples_motr_eyetr_cor <- as.data.frame(motr_eyetr_cor)
posterior_samples_motr_eyetr_cor
# Get all the draws of scaleeyetr_value
cor_draws <- posterior_samples_motr_eyetr_cor$b_scaleeyetr_value %>% data.frame()
View(cor_draws)
pp_check(motr_eyetr_cor, type = "dens_overlay")
Using 10 posterior draws for ppc type 'dens_overlay' by default.
pp_check(motr_eyetr_cor, type = "scatter")
Using 10 posterior draws for ppc type 'scatter' by default.
=======================================================RANK=======================================================
print("Rank First Pass Regression Prob. all data")
[1] "Rank First Pass Regression Prob. all data"
reg_df_rank = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_rank = rank(eyetr_value, ties.method = "max"),
motr_rank = rank(motr_value, ties.method = "max"))
reg_df_rank_drop0s = provo_df %>% filter(metric == "FPReg") %>%
spread(measure, value) %>%
filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr_rank = rank(eyetr_value, ties.method = "max"),
motr_rank = rank(motr_value, ties.method = "max"))
View(reg_df_rank)
print(cor.test(reg_df_rank$eyetr_rank, reg_df_rank$motr_rank)$estimate)
cor
0.151
print(cor.test(reg_df_rank$eyetr_rank, reg_df_rank$motr_rank)$p.value)
[1] 0.00000000000002169
print(cor.test(reg_df_rank_drop0s$eyetr_rank, reg_df_rank_drop0s$motr_rank)$estimate)
cor
0.1491
print(cor.test(reg_df_rank_drop0s$eyetr_rank, reg_df_rank_drop0s$motr_rank)$p.value)
[1] 0.00000568
reg_df_rank %>%
gather(measure, value, 14:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_df_rank_drop0s %>%
gather(measure, value, 14:15) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
reg_temp_rank <- reg_df_rank[c("eyetr_rank", "motr_rank")] %>% data.matrix()
reg_temp_rank_drop0s <- reg_df_rank_drop0s[c("eyetr_rank", "motr_rank")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(reg_temp_rank, pch = 16, col = "blue",
main = "FPReg ranked")
plot(reg_temp_rank_drop0s, pch = 16, col = "blue",
main = "FPReg ranked drop0s ")
NA
NA
reg_rank_data = list(x=reg_temp_rank, N=nrow(reg_temp_rank))
fit_reg_rank = stan(
file="stan_models/bivariate_correlation.stan",
data=reg_rank_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg_rank@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg_rank, file = paste0("./temp/ranked_motr_eyetr_FPReg_cor.rds"))
print("Rank First Pass Regression Prob. all data")
[1] "Rank First Pass Regression Prob. all data"
ereg_df_rank = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# filter(eyetr_value > 0, motr_value > 0) %>%
mutate(eyetr1_rank = rank(value_1, ties.method = "max"),
eyetr2_rank = rank(value_2, ties.method = "max"))
ereg_df_rank_drop0s = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
filter(value_1 > 0, value_2 > 0) %>%
mutate(eyetr1_rank = rank(value_1, ties.method = "max"),
eyetr2_rank = rank(value_2, ties.method = "max"))
View(ereg_df_rank)
print(cor.test(ereg_df_rank$eyetr1_rank, ereg_df_rank$eyetr2_rank)$estimate)
cor
0.6001
print(cor.test(ereg_df_rank$eyetr1_rank, ereg_df_rank$eyetr2_rank)$p.value)
[1] 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004421
print(cor.test(ereg_df_rank_drop0s$eyetr1_rank, ereg_df_rank_drop0s$eyetr2_rank)$estimate)
cor
0.5845
print(cor.test(ereg_df_rank_drop0s$eyetr1_rank, ereg_df_rank_drop0s$eyetr2_rank)$p.value)
[1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000007256
ereg_df_rank %>%
gather(measure, value, 7:8) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_df_rank_drop0s %>%
gather(measure, value, 7:8) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp_rank <- ereg_df_rank[c("eyetr1_rank", "eyetr2_rank")] %>% data.matrix()
ereg_temp_rank_drop0s <- ereg_df_rank_drop0s[c("eyetr1_rank", "eyetr2_rank")] %>% data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix td_temp
plot(ereg_temp_rank, pch = 16, col = "blue",
main = "FPReg ranked")
plot(ereg_temp_rank_drop0s, pch = 16, col = "blue",
main = "FPReg ranked drop0s ")
fit_mreg_all = readRDS("./motr_eyetr_FPReg_cor_all_data.rds")
fit_mreg_all_drop0 = readRDS("./motr_eyetr_FPReg_cor_all_data_drop0s.rds")
fit_mreg_low = readRDS("./motr_eyetr_FPReg_cor_00-03.rds")
fit_mreg_low_drop0 = readRDS("./motr_eyetr_FPReg_cor_00-03_drop0s.rds")
fit_mreg_high = readRDS("./motr_eyetr_FPReg_cor_03-1.rds")
fit_mreg_high_drop0 = readRDS("./motr_eyetr_FPReg_cor_03-1_drop0s.rds")
fit_rank_all = readRDS("./temp/ranked_motr_eyetr_FPReg_cor.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_mreg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.12 0.12 0.12 0.13 0.13 5811 1
mu[2] 0.03 0.00 0.00 0.02 0.03 0.03 0.03 0.03 3384 1
sigma[1] 0.08 0.00 0.00 0.07 0.07 0.08 0.08 0.08 3694 1
sigma[2] 0.05 0.00 0.00 0.05 0.05 0.05 0.05 0.06 3040 1
nu 2.42 0.00 0.14 2.16 2.33 2.42 2.52 2.71 3250 1
rho 0.16 0.00 0.02 0.11 0.14 0.16 0.17 0.20 8176 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 3693 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6348 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 3053 1
x_rand[1] 0.16 0.00 0.12 0.02 0.09 0.14 0.20 0.40 7811 1
x_rand[2] 0.07 0.00 0.08 0.00 0.03 0.05 0.09 0.25 7966 1
attempt 0.63 0.01 1.03 0.00 0.00 0.00 1.00 3.00 7919 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7450.58 0.03 1.78 7446.22 7449.65 7450.92 7451.89 7452.98 3519 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:08:09 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data no 0s--------------------------------------------"
print(fit_mreg_all_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.15 0.00 0.00 0.14 0.15 0.15 0.15 0.16 8507 1
mu[2] 0.14 0.00 0.00 0.13 0.14 0.14 0.14 0.14 7923 1
sigma[1] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 7186 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.06 6227 1
nu 2.78 0.00 0.23 2.37 2.62 2.77 2.93 3.26 7008 1
rho 0.18 0.00 0.04 0.11 0.16 0.18 0.21 0.26 9833 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7173 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8599 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6234 1
x_rand[1] 0.18 0.00 0.13 0.02 0.10 0.16 0.22 0.43 7699 1
x_rand[2] 0.15 0.00 0.08 0.03 0.10 0.14 0.18 0.34 8175 1
attempt 0.16 0.00 0.43 0.00 0.00 0.00 0.00 1.00 7917 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2566.44 0.03 1.75 2562.18 2565.53 2566.76 2567.72 2568.88 3487 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 23:18:11 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_mreg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.12 0.00 0.00 0.11 0.12 0.12 0.12 0.12 8566 1
mu[2] 0.03 0.00 0.00 0.03 0.03 0.03 0.04 0.04 4981 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 7280 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 4552 1
nu 5.54 0.01 0.51 4.62 5.19 5.51 5.86 6.62 4637 1
rho 0.10 0.00 0.02 0.06 0.09 0.10 0.12 0.15 8596 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7299 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8259 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 4571 1
x_rand[1] 0.13 0.00 0.07 0.02 0.08 0.12 0.17 0.28 8277 1
x_rand[2] 0.07 0.00 0.06 0.00 0.03 0.06 0.10 0.21 7280 1
attempt 0.53 0.01 0.90 0.00 0.00 0.00 1.00 3.00 8006 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7794.82 0.03 1.78 7790.50 7793.88 7795.16 7796.13 7797.26 3232 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:52:03 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3 no 0s--------------------------------------------"
print(fit_mreg_low_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.13 0.00 0.00 0.13 0.13 0.13 0.13 0.14 10534 1
mu[2] 0.13 0.00 0.00 0.13 0.13 0.13 0.14 0.14 8839 1
sigma[1] 0.06 0.00 0.00 0.06 0.06 0.06 0.06 0.07 8534 1
sigma[2] 0.06 0.00 0.00 0.06 0.06 0.06 0.07 0.07 7122 1
nu 6.50 0.01 0.96 4.90 5.83 6.40 7.06 8.73 6790 1
rho 0.07 0.00 0.04 0.00 0.05 0.07 0.10 0.15 10356 1
cov[1,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 8526 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 10435 1
cov[2,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 7123 1
x_rand[1] 0.14 0.00 0.07 0.02 0.09 0.13 0.18 0.28 8031 1
x_rand[2] 0.14 0.00 0.07 0.02 0.10 0.14 0.18 0.29 8006 1
attempt 0.08 0.00 0.30 0.00 0.00 0.00 0.00 1.00 7642 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 2737.21 0.03 1.76 2732.81 2736.27 2737.56 2738.50 2739.62 3992 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 20:30:02 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_mreg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.45 0.00 0.01 0.44 0.45 0.45 0.46 0.47 5045 1
mu[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.18 4628 1
sigma[1] 0.11 0.00 0.01 0.10 0.11 0.11 0.12 0.12 5622 1
sigma[2] 0.15 0.00 0.01 0.13 0.15 0.15 0.16 0.17 4499 1
nu 25.71 0.18 12.83 9.59 16.53 22.89 31.70 58.03 5109 1
rho 0.41 0.00 0.06 0.30 0.37 0.41 0.45 0.51 7137 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5625 1
cov[1,2] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,1] 0.01 0.00 0.00 0.00 0.01 0.01 0.01 0.01 4842 1
cov[2,2] 0.02 0.00 0.00 0.02 0.02 0.02 0.03 0.03 4553 1
x_rand[1] 0.47 0.00 0.11 0.25 0.39 0.47 0.54 0.70 7964 1
x_rand[2] 0.20 0.00 0.13 0.01 0.10 0.19 0.28 0.49 7829 1
attempt 0.19 0.01 0.47 0.00 0.00 0.00 0.00 1.00 7881 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 579.26 0.03 1.75 575.10 578.35 579.56 580.56 581.68 3529 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:25:38 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3 no 0s--------------------------------------------"
print(fit_mreg_high_drop0)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.48 0.00 0.01 0.46 0.48 0.48 0.49 0.51 6409 1
mu[2] 0.27 0.00 0.01 0.25 0.26 0.27 0.28 0.30 6397 1
sigma[1] 0.12 0.00 0.01 0.11 0.12 0.12 0.13 0.14 6555 1
sigma[2] 0.16 0.00 0.01 0.14 0.15 0.16 0.17 0.18 6281 1
nu 32.85 0.16 15.27 11.84 21.65 30.12 40.80 69.54 8953 1
rho 0.51 0.00 0.07 0.37 0.47 0.52 0.56 0.64 6986 1
cov[1,1] 0.02 0.00 0.00 0.01 0.01 0.02 0.02 0.02 6491 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5145 1
cov[2,2] 0.03 0.00 0.00 0.02 0.02 0.03 0.03 0.03 6242 1
x_rand[1] 0.49 0.00 0.13 0.25 0.41 0.49 0.57 0.74 8129 1
x_rand[2] 0.29 0.00 0.15 0.04 0.18 0.28 0.38 0.60 7467 1
attempt 0.06 0.00 0.24 0.00 0.00 0.00 0.00 1.00 7913 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 300.28 0.03 1.75 296.05 299.35 300.60 301.58 302.70 3555 1
Samples were drawn using NUTS(diag_e) at Sat Aug 5 22:31:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression ranked all data with 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression ranked all data with 0s--------------------------------------------"
print(fit_rank_all)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 1208.92 0.12 11.63 1185.89 1201.15 1208.92 1216.73 1231.7 9625 1
mu[2] 1764.74 0.04 4.17 1756.59 1761.91 1764.73 1767.55 1773.0 11334 1
sigma[1] 760.33 0.08 8.54 744.01 754.53 760.21 766.08 777.2 10442 1
sigma[2] 284.51 0.03 3.04 278.58 282.45 284.49 286.55 290.4 10325 1
nu 94.06 0.24 23.62 55.71 77.00 91.34 108.22 148.9 9447 1
rho 0.16 0.00 0.02 0.12 0.14 0.16 0.17 0.2 9737 1
cov[1,1] 578171.63 127.33 12985.22 553549.20 569313.77 577913.10 586885.25 603978.7 10400 1
cov[1,2] 34006.07 45.52 4457.59 25228.03 31014.76 34023.27 36996.15 42647.8 9590 1
cov[2,1] 34006.07 45.52 4457.59 25228.03 31014.76 34023.27 36996.15 42647.8 9590 1
cov[2,2] 80956.56 17.04 1731.88 77608.45 79780.12 80933.51 82112.99 84353.1 10326 1
x_rand[1] 1300.25 7.59 683.23 138.65 791.58 1256.01 1761.41 2731.3 8098 1
x_rand[2] 1768.40 3.22 289.99 1196.92 1575.57 1768.07 1959.00 2350.4 8091 1
attempt 0.06 0.00 0.26 0.00 0.00 0.00 0.00 1.0 7886 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.0 NaN NaN
lp__ -73652.70 0.03 1.73 -73656.85 -73653.64 -73652.38 -73651.42 -73650.3 4295 1
Samples were drawn using NUTS(diag_e) at Wed Aug 9 15:40:36 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# # FPReg all data
# stan_trace(fit_mreg_all)
# stan_dens(fit_mreg_all, separate_chains = TRUE)
# stan_plot(fit_mreg_all)
# stan_trace(fit_mreg_all_drop0)
# stan_dens(fit_mreg_all_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_all_drop0)
# # FPReg < 0.3
# stan_trace(fit_mreg_low)
# stan_dens(fit_mreg_low, separate_chains = TRUE)
# stan_plot(fit_mreg_low)
#
# stan_trace(fit_mreg_low_drop0)
# stan_dens(fit_mreg_low_drop0, separate_chains = TRUE)
# stan_plot(fit_mreg_low_drop0)
# FPReg > 0.3
stan_trace(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
stan_dens(fit_mreg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.
stan_plot(fit_mreg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)
stan_trace(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
stan_dens(fit_mreg_high_drop0, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.
stan_plot(fit_mreg_high_drop0)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)
print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_mreg_all = as.numeric(extract(fit_mreg_all, "rho")[[1]])
mean = mean(rho_mreg_all)
crI = quantile(rho_mreg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1574
HPD: [0.1139, 0.2017]
crI: [0.1134, 0.2014]
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
rho_mreg_all_drop0 = as.numeric(extract(fit_mreg_all_drop0, "rho")[[1]])
mean = mean(rho_mreg_all_drop0)
crI = quantile(rho_mreg_all_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_all_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1843
HPD: [0.1106, 0.2573]
crI: [0.1114, 0.2581]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_mreg_low = as.numeric(extract(fit_mreg_low, "rho")[[1]])
mean = mean(rho_mreg_low)
crI = quantile(rho_mreg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.103
HPD: [0.05775, 0.1468]
crI: [0.05805, 0.1475]
print('---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s--------------------------------------------"
rho_mreg_low_drop0 = as.numeric(extract(fit_mreg_low_drop0, "rho")[[1]])
mean = mean(rho_mreg_low_drop0)
crI = quantile(rho_mreg_low_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_low_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.07362
HPD: [-0.000616, 0.1514]
crI: [-0.003759, 0.1491]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_mreg_high = as.numeric(extract(fit_mreg_high, "rho")[[1]])
mean = mean(rho_mreg_high)
crI = quantile(rho_mreg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4102
HPD: [0.3001, 0.5155]
crI: [0.2992, 0.5146]
print('---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s--------------------------------------------"
rho_mreg_high_drop0 = as.numeric(extract(fit_mreg_high_drop0, "rho")[[1]])
mean = mean(rho_mreg_high_drop0)
crI = quantile(rho_mreg_high_drop0, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_high_drop0), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.514
HPD: [0.3797, 0.6456]
crI: [0.3728, 0.6412]
print('---------------------------- First Pass Regression ranked all with 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression ranked all with 0s--------------------------------------------"
rho_mreg_fit_rank_all = as.numeric(extract(fit_rank_all, "rho")[[1]])
mean = mean(rho_mreg_fit_rank_all)
crI = quantile(rho_mreg_fit_rank_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mreg_fit_rank_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.1572
HPD: [0.1164, 0.1953]
crI: [0.1169, 0.196]
print('---------------------------- First Pass Regression all data --------------------------------------------')
[1] "---------------------------- First Pass Regression all data --------------------------------------------"
mallreg_rand <- extract(fit_mreg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand[,1], mallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression all data no 0s--------------------------------------------')
[1] "---------------------------- First Pass Regression all data no 0s--------------------------------------------"
mallreg_rand_drop0 <- extract(fit_mreg_all_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mallreg_rand_drop0[,1], mallreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_all, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mallreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mallreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 --------------------------------------------"
mlowreg_rand <- extract(fit_mreg_low, "x_rand")[[1]]
# print(mlowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand[,1], mlowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 no 0s --------------------------------------------"
mlowreg_rand_drop0 <- extract(fit_mreg_low_drop0, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlowreg_rand_drop0[,1], mlowreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_low_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlowreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlowreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 --------------------------------------------"
mhighreg_rand_samples <- extract(fit_mreg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(mhighreg_rand_samples), 900)
mhighreg_rand <- mhighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand[,1], mhighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 no 0s --------------------------------------------"
mhighreg_rand_drop0_samples <- extract(fit_mreg_high_drop0, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mhighreg_rand_drop0_samples), 900)
mhighreg_rand_drop0 <- mhighreg_rand_drop0_samples[selected_indices, ]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mhighreg_rand_drop0[,1], mhighreg_rand_drop0[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_high_drop0, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mhighreg_rand_drop0, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mhighreg_rand_drop0, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression ranked all data with 0s --------------------------------------------')
[1] "---------------------------- First Pass Regression ranked all data with 0s --------------------------------------------"
mrankreg_rand_samples <- extract(fit_rank_all, "x_rand")[[1]]
selected_indices <- sample(1:nrow(mrankreg_rand_samples), 900)
mrankreg_rand <- mrankreg_rand_samples[selected_indices, ]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 3500), ylim=c(0, 3500), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mrankreg_rand[,1], mrankreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(reg_temp_rank, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mrankreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mrankreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("Gaze Duration")
[1] "Gaze Duration"
# View(provo_eyetr_grouped_df)
egd_df = provo_eyetr_grouped_df %>% filter(metric == "gaze_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egd_df$eyetr_value_1, egd_df$eyetr_value_2)$estimate)
cor
0.9146
# View(egd_df)
egd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egd_temp <- egd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egd_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
egd_data = list(x=egd_temp, N=nrow(egd_temp))
fit_egd = stan(
file="stan_models/bivariate_correlation.stan",
data=egd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egd, file = paste0("eyetr_eyetr_gaze_duration_cor.rds"))
print("Go Past Time")
[1] "Go Past Time"
egpt_df = provo_eyetr_grouped_df %>% filter(metric == "go_past_time") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(egpt_df$eyetr_value_1, egpt_df$eyetr_value_2)$estimate)
cor
0.9134
# View(egd_df)
egpt_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
egpt_temp <- egpt_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(egpt_temp, pch = 16, col = "blue",
main = "Not Log-Transformed")
egpt_data = list(x=egpt_temp, N=nrow(egpt_temp))
fit_egpt = stan(
file="stan_models/bivariate_correlation.stan",
data=egpt_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_egpt@stanmodel@dso <- new("cxxdso")
saveRDS(fit_egpt, file = paste0("eyetr_eyetr_go_past_time_cor.rds"))
print("Total Duration")
[1] "Total Duration"
etd_df = provo_eyetr_grouped_df %>% filter(metric == "total_duration") %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value, na.rm = TRUE), .groups = 'drop') %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1),
eyetr_value_2 = pmax(value_2, 1)
)
print(cor.test(etd_df$eyetr_value_1, etd_df$eyetr_value_2)$estimate)
cor
0.9272
# View(egd_df)
etd_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
etd_temp <- etd_df[c("eyetr_value_1", "eyetr_value_2")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(etd_temp, pch = 16, col = "blue",
main = "Total Duration Not Log-Transformed")
etd_data = list(x=etd_temp, N=nrow(etd_temp))
fit_etd = stan(
file="stan_models/bivariate_correlation.stan",
data=etd_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_etd@stanmodel@dso <- new("cxxdso")
saveRDS(fit_etd, file = paste0("eyetr_eyetr_total_duration_cor.rds"))
print("Fisrt Pass Regression Prob.")
[1] "Fisrt Pass Regression Prob."
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)
)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
cor
0.741
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg Not Log-Transformed")
# -------fit model FPReg ----------
# View(ereg_temp)
ereg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_ereg = stan(
file="stan_models/bivariate_normal_reg.stan",
data=ereg_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_ereg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_ereg, file = paste0("eyetr_eyetr_FPReg_cor5.rds"))
fit_egd = readRDS("./eyetr_eyetr_gaze_duration_cor.rds")
fit_egpt = readRDS("./eyetr_eyetr_go_past_time_cor.rds")
fit_etd = readRDS("./eyetr_eyetr_total_duration_cor.rds")
fit_ereg = readRDS("./eyetr_eyetr_FPReg_cor.rds")
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
print(fit_egd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 128.59 0.02 1.11 126.37 127.86 128.61 129.35 130.72 4725 1
mu[2] 118.86 0.01 0.98 116.88 118.20 118.87 119.51 120.76 4981 1
sigma[1] 80.51 0.01 0.90 78.74 79.90 80.52 81.12 82.25 4570 1
sigma[2] 71.27 0.01 0.79 69.71 70.73 71.27 71.80 72.81 4737 1
nu 25.93 0.07 4.93 18.41 22.47 25.17 28.62 37.53 5323 1
rho 0.93 0.00 0.00 0.93 0.93 0.93 0.93 0.94 5350 1
cov[1,1] 6483.12 2.15 145.27 6200.11 6384.78 6483.59 6579.87 6765.15 4571 1
cov[1,2] 5343.14 1.82 117.99 5110.89 5262.31 5342.62 5422.39 5572.98 4180 1
cov[2,1] 5343.14 1.82 117.99 5110.89 5262.31 5342.62 5422.39 5572.98 4180 1
cov[2,2] 5079.72 1.65 113.15 4859.11 5002.72 5079.35 5155.69 5301.71 4730 1
x_rand[1] 142.08 0.85 72.88 19.00 88.95 136.40 189.59 301.58 7381 1
x_rand[2] 130.48 0.76 65.14 20.87 82.14 126.27 171.93 269.53 7385 1
attempt 0.07 0.00 0.28 0.00 0.00 0.00 0.00 1.00 8017 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -53400.86 0.03 1.74 -53405.16 -53401.77 -53400.53 -53399.58 -53398.45 3567 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 21:28:08 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
print(fit_egpt)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 151.87 0.02 1.54 148.90 150.83 151.88 152.91 154.87 4316 1
mu[2] 139.50 0.02 1.35 136.85 138.58 139.50 140.40 142.16 4309 1
sigma[1] 104.23 0.02 1.38 101.57 103.29 104.23 105.15 106.96 4517 1
sigma[2] 91.29 0.02 1.18 89.01 90.49 91.28 92.10 93.67 4238 1
nu 9.84 0.01 0.97 8.17 9.16 9.77 10.43 12.02 4294 1
rho 0.93 0.00 0.00 0.93 0.93 0.93 0.94 0.94 5682 1
cov[1,1] 10866.34 4.27 286.82 10316.57 10668.36 10863.38 11057.52 11440.23 4514 1
cov[1,2] 8897.70 3.58 230.96 8458.01 8739.41 8893.80 9052.34 9359.86 4154 1
cov[2,1] 8897.70 3.58 230.96 8458.01 8739.41 8893.80 9052.34 9359.86 4154 1
cov[2,2] 8335.83 3.32 215.96 7923.28 8187.73 8331.15 8483.17 8774.46 4239 1
x_rand[1] 171.31 1.07 96.66 19.27 101.02 159.61 228.57 389.14 8093 1
x_rand[2] 156.09 0.95 85.13 18.99 93.98 147.13 208.02 344.02 7994 1
attempt 0.12 0.00 0.37 0.00 0.00 0.00 0.00 1.00 7345 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -55541.43 0.03 1.77 -55545.72 -55542.36 -55541.11 -55540.13 -55539.02 3362 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 21:52:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
print(fit_etd)
Inference for Stan model: bivariate_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 182.46 0.02 1.40 179.71 181.51 182.43 183.42 185.23 4475 1
mu[2] 163.16 0.02 1.24 160.74 162.32 163.15 164.00 165.56 4552 1
sigma[1] 102.05 0.02 1.18 99.76 101.25 102.05 102.84 104.35 4659 1
sigma[2] 90.80 0.01 1.04 88.77 90.11 90.80 91.49 92.86 4859 1
nu 17.08 0.04 2.46 13.14 15.32 16.80 18.44 22.84 4836 1
rho 0.94 0.00 0.00 0.94 0.94 0.94 0.94 0.95 4836 1
cov[1,1] 10414.71 3.51 240.12 9951.10 10251.81 10414.32 10575.72 10888.84 4667 1
cov[1,2] 8723.92 2.96 197.89 8344.44 8591.58 8725.13 8854.22 9120.77 4456 1
cov[2,1] 8723.92 2.96 197.89 8344.44 8591.58 8725.13 8854.22 9120.77 4456 1
cov[2,2] 8245.86 2.71 189.17 7879.76 8119.40 8245.31 8370.73 8622.71 4862 1
x_rand[1] 197.26 1.10 96.53 32.70 128.21 189.92 257.81 404.81 7760 1
x_rand[2] 176.38 0.99 86.54 30.04 113.27 170.41 230.58 357.63 7701 1
attempt 0.06 0.00 0.25 0.00 0.00 0.00 0.00 1.00 7873 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -55910.26 0.03 1.76 -55914.61 -55911.19 -55909.93 -55908.98 -55907.84 3286 1
Samples were drawn using NUTS(diag_e) at Sat Jul 22 22:21:40 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.--------------------------------------------"
print(fit_ereg)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.11 0.00 0.00 0.10 0.10 0.11 0.11 0.11 7352 1
mu[2] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 6993 1
sigma[1] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 5584 1
sigma[2] 0.10 0.00 0.00 0.10 0.10 0.10 0.10 0.11 5441 1
nu 3.16 0.00 0.13 2.92 3.07 3.16 3.24 3.42 6221 1
rho 0.73 0.00 0.01 0.71 0.73 0.73 0.74 0.76 7209 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5581 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5442 1
x_rand[1] 0.17 0.00 0.14 0.01 0.08 0.14 0.22 0.47 7543 1
x_rand[2] 0.17 0.00 0.14 0.01 0.08 0.14 0.21 0.46 8201 1
attempt 0.37 0.01 0.70 0.00 0.00 0.00 1.00 2.00 7941 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 11696.50 0.03 1.76 11692.31 11695.54 11696.82 11697.81 11698.90 3562 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 01:21:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# stan_trace(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# stan_dens(fit_egd, pars=c("rho", "mu", "sigma", "nu"), separate_chains = TRUE)
# stan_plot(fit_egd, pars=c("rho", "mu", "sigma", "nu"))
# Gaze Duration
stan_trace(fit_egd)
stan_dens(fit_egd, separate_chains = TRUE)
stan_plot(fit_egd)
# Go Past Time
stan_trace(fit_egpt)
stan_dens(fit_egpt, separate_chains = TRUE)
stan_plot(fit_egpt)
# Total Duration
stan_trace(fit_etd)
stan_dens(fit_etd, separate_chains = TRUE)
stan_plot(fit_etd)
# FPReg
stan_trace(fit_ereg)
stan_dens(fit_ereg, separate_chains = TRUE)
stan_plot(fit_ereg)
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
rho_egd = as.numeric(extract(fit_egd, "rho")[[1]])
mean = mean(rho_egd)
crI = quantile(rho_egd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9311
HPD: [0.9263, 0.9359]
crI: [0.9262, 0.9358]
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
rho_egpt = as.numeric(extract(fit_egpt, "rho")[[1]])
mean = mean(rho_egpt)
crI = quantile(rho_egpt, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_egpt), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9349
HPD: [0.9297, 0.9395]
crI: [0.9298, 0.9397]
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
rho_etd = as.numeric(extract(fit_etd, "rho")[[1]])
mean = mean(rho_etd)
crI = quantile(rho_etd, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_etd), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.9414
HPD: [0.9372, 0.9457]
crI: [0.937, 0.9456]
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
rho_ereg = as.numeric(extract(fit_ereg, "rho")[[1]])
mean = mean(rho_ereg)
crI = quantile(rho_ereg, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.7339
HPD: [0.7128, 0.7557]
crI: [0.7119, 0.755]
print('---------------------------- Gaze Duration--------------------------------------------')
[1] "---------------------------- Gaze Duration--------------------------------------------"
egd_rand <- extract(fit_egd, "x_rand")[[1]]
# x_rand_filtered <- x_rand[apply(x_rand, 1, function(x) all(x > 0)),]
# x_rand_filtered
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 400), ylim=c(0, 700), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egd_rand[,1], egd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Go Past Time--------------------------------------------')
[1] "---------------------------- Go Past Time--------------------------------------------"
egpt_rand <- extract(fit_egpt, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Go Past Time") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(egpt_rand[,1], egpt_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(egpt_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(egpt_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(egpt_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- Total Duration--------------------------------------------')
[1] "---------------------------- Total Duration--------------------------------------------"
etd_rand <- extract(fit_etd, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 1200), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "Total Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(etd_rand[,1], etd_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(etd_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(etd_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(etd_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression --------------------------------------------')
[1] "---------------------------- First Pass Regression --------------------------------------------"
ereg_rand <- extract(fit_ereg, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value 1", ylab = "Eye tracking value 2", main = "First Pass Regression") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ereg_rand[,1], ereg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ereg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ereg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("Log Frequency")
[1] "Log Frequency"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$freq)$estimate)
cor
-0.7454
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$freq)$estimate)
cor
-0.8069
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(7, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mfreq_temp <- stats_cor_df[c("motr_value", "freq")] %>%
data.matrix()
efreq_temp <- stats_cor_df[c("eyetr_value", "freq")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mfreq_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Frequency")
# Plot the first data matrix gd_temp
plot(efreq_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Frequency")
mfreq_data = list(x=mfreq_temp, N=nrow(mfreq_temp))
fit_mfreq = stan(
file="stan_models/stats_correlation.stan",
data=mfreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mfreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mfreq, file = paste0("motr_freq_cor.rds"))
efreq_data = list(x=efreq_temp, N=nrow(efreq_temp))
fit_efreq = stan(
file="stan_models/stats_correlation.stan",
data=efreq_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_efreq@stanmodel@dso <- new("cxxdso")
saveRDS(fit_efreq, file = paste0("eyetr_freq_cor.rds"))
print("Length")
[1] "Length"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$len)$estimate)
cor
0.8644
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$len)$estimate)
cor
0.8597
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(9, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
mlen_temp <- stats_cor_df[c("motr_value", "len")] %>%
data.matrix()
elen_temp <- stats_cor_df[c("eyetr_value", "len")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(mlen_temp, pch = 16, col = "blue",
main = "MoTR RTs and Word Length")
# Plot the first data matrix gd_temp
plot(elen_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Word Length")
mlen_data = list(x=mlen_temp, N=nrow(mlen_temp))
fit_mlen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=mlen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_mlen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_mlen, file = paste0("motr_len_cor2.rds"))
elen_data = list(x=elen_temp, N=nrow(elen_temp))
fit_elen = stan(
file="stan_models/stats_correlation_len_normal.stan",
data=elen_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_elen@stanmodel@dso <- new("cxxdso")
saveRDS(fit_elen, file = paste0("eyetr_len_cor.rds"))
print("Surprisal")
[1] "Surprisal"
stats_cor_df = provo_df %>% filter(metric == "gaze_duration") %>% spread(measure, value)
print(cor.test(stats_cor_df$motr_value, stats_cor_df$surp)$estimate)
cor
0.4978
print(cor.test(stats_cor_df$eyetr_value, stats_cor_df$surp)$estimate)
cor
0.5683
# View(stats_cor_df)
stats_cor_df %>%
gather(measure, value, c(8, 13)) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
msurp_temp <- stats_cor_df[c("motr_value", "surp")] %>%
data.matrix()
esurp_temp <- stats_cor_df[c("eyetr_value", "surp")] %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 2))
# Plot the first data matrix gd_temp
plot(msurp_temp, pch = 16, col = "blue",
main = "MoTR RTs and Surprisal")
# Plot the first data matrix gd_temp
plot(esurp_temp, pch = 16, col = "blue",
main = "EyeTR RTs and Surprisal")
msurp_data = list(x=msurp_temp, N=nrow(msurp_temp))
fit_msurp = stan(
file="stan_models/stats_correlation.stan",
data=msurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_msurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_msurp, file = paste0("motr_surp_cor.rds"))
esurp_data = list(x=esurp_temp, N=nrow(esurp_temp))
fit_esurp = stan(
file="stan_models/stats_correlation.stan",
data=esurp_data,
iter=4000,
chains=4,
cores=8,
seed=444,
# control=list(adapt_delta=0.99),
# verbose = FALSE
)
# Save the model
fit_esurp@stanmodel@dso <- new("cxxdso")
saveRDS(fit_esurp, file = paste0("eyetr_surp_cor.rds"))
fit_mfreq = readRDS("./motr_freq_cor.rds")
fit_efreq = readRDS("./eyetr_freq_cor.rds")
fit_mlen = readRDS("./motr_len_cor.rds")
fit_elen = readRDS("./eyetr_len_cor.rds")
fit_msurp = readRDS("./motr_surp_cor.rds")
fit_esurp = readRDS("./eyetr_surp_cor.rds")
print('---------------------------- MoTR & Log Frequency --------------------------------------------')
[1] "---------------------------- MoTR & Log Frequency --------------------------------------------"
print(fit_mfreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 206.16 0.02 1.93 202.44 204.86 206.17 207.47 209.88 8522 1
mu[2] 5.80 0.00 0.02 5.77 5.79 5.80 5.81 5.84 8983 1
sigma[1] 138.91 0.02 1.50 136.02 137.87 138.92 139.91 141.86 8278 1
sigma[2] 1.34 0.00 0.01 1.32 1.33 1.34 1.35 1.37 8274 1
nu 99.82 0.24 23.02 62.37 83.46 97.35 113.85 150.17 9319 1
rho -0.76 0.00 0.01 -0.78 -0.77 -0.76 -0.76 -0.75 8377 1
cov[1,1] 19299.02 4.59 417.70 18501.76 19006.99 19298.28 19575.75 20123.93 8268 1
cov[1,2] -141.93 0.04 3.34 -148.59 -144.18 -141.93 -139.65 -135.44 6864 1
cov[2,1] -141.93 0.04 3.34 -148.59 -144.18 -141.93 -139.65 -135.44 6864 1
cov[2,2] 1.80 0.00 0.04 1.73 1.78 1.80 1.83 1.87 8265 1
x_rand[1] 227.30 1.38 121.53 20.78 136.69 221.29 306.64 484.97 7799 1
x_rand[2] 5.66 0.01 1.26 3.09 4.82 5.70 6.53 8.03 8041 1
attempt 0.07 0.00 0.27 0.00 0.00 0.00 0.00 1.00 7926 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -36159.81 0.03 1.73 -36164.02 -36160.74 -36159.48 -36158.53 -36157.45 3770 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:18:27 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Log Frequency --------------------------------------------')
[1] "---------------------------- EyeTR & Log Frequency --------------------------------------------"
print(fit_efreq)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 129.58 0.01 0.95 127.71 128.96 129.59 130.21 131.45 7967 1
mu[2] 5.79 0.00 0.02 5.75 5.78 5.79 5.80 5.82 8371 1
sigma[1] 73.11 0.01 0.75 71.66 72.60 73.10 73.61 74.61 7440 1
sigma[2] 1.34 0.00 0.01 1.31 1.33 1.34 1.35 1.36 7478 1
nu 91.76 0.24 21.94 56.83 76.18 89.08 104.50 142.84 8551 1
rho -0.82 0.00 0.01 -0.83 -0.82 -0.82 -0.81 -0.81 8039 1
cov[1,1] 5345.65 1.28 110.19 5134.97 5270.29 5343.14 5418.78 5566.32 7432 1
cov[1,2] -80.04 0.02 1.70 -83.43 -81.17 -80.03 -78.89 -76.73 6431 1
cov[2,1] -80.04 0.02 1.70 -83.43 -81.17 -80.03 -78.89 -76.73 6431 1
cov[2,2] 1.79 0.00 0.04 1.72 1.77 1.79 1.81 1.86 7476 1
x_rand[1] 137.10 0.77 67.73 16.30 87.55 134.62 182.36 275.36 7640 1
x_rand[2] 5.68 0.01 1.27 3.13 4.82 5.68 6.55 8.11 7779 1
attempt 0.04 0.00 0.19 0.00 0.00 0.00 0.00 1.00 8076 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -32784.74 0.03 1.72 -32789.05 -32785.64 -32784.43 -32783.48 -32782.37 4038 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 15:44:55 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
print(fit_mlen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 197.87 0.03 2.04 193.81 196.48 197.88 199.27 201.86 4675 1
mu[2] 4.40 0.00 0.04 4.33 4.38 4.40 4.43 4.48 4776 1
sigma[1] 136.99 0.03 1.75 133.57 135.79 136.99 138.19 140.43 4848 1
sigma[2] 2.51 0.00 0.03 2.45 2.49 2.51 2.53 2.58 4665 1
nu 17.03 0.04 2.84 12.51 15.05 16.64 18.61 23.71 4796 1
rho 0.89 0.00 0.00 0.88 0.88 0.89 0.89 0.89 5940 1
cov[1,1] 18768.69 6.88 479.04 17841.00 18438.93 18766.08 19096.47 19721.77 4845 1
cov[1,2] 305.06 0.12 7.91 289.81 299.65 305.00 310.39 320.87 4341 1
cov[2,1] 305.06 0.12 7.91 289.81 299.65 305.00 310.39 320.87 4341 1
cov[2,2] 6.31 0.00 0.17 5.98 6.19 6.30 6.42 6.64 4664 1
x_rand[1] 219.00 1.39 122.76 20.11 127.61 208.39 297.11 488.02 7751 1
x_rand[2] 5.26 0.03 2.36 1.00 4.00 5.00 7.00 10.00 7861 1
attempt 0.10 0.00 0.33 0.00 0.00 0.00 0.00 1.00 7797 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -38437.19 0.03 1.75 -38441.43 -38438.10 -38436.86 -38435.92 -38434.79 3210 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:35:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
print(fit_elen)
Inference for Stan model: stats_correlation_len_normal.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 125.47 0.01 1.05 123.48 124.75 125.46 126.18 127.53 5037 1
mu[2] 4.45 0.00 0.04 4.38 4.43 4.45 4.48 4.52 4809 1
sigma[1] 72.58 0.01 0.88 70.87 71.98 72.57 73.15 74.31 4816 1
sigma[2] 2.49 0.00 0.03 2.42 2.47 2.49 2.51 2.55 4786 1
nu 17.29 0.04 2.69 13.08 15.35 16.97 18.80 23.46 4894 1
rho 0.88 0.00 0.00 0.87 0.88 0.88 0.89 0.89 5800 1
cov[1,1] 5268.20 1.84 127.51 5022.34 5181.60 5266.78 5351.59 5522.02 4819 1
cov[1,2] 159.48 0.06 3.98 151.79 156.77 159.44 162.13 167.48 4232 1
cov[2,1] 159.48 0.06 3.98 151.79 156.77 159.44 162.13 167.48 4232 1
cov[2,2] 6.19 0.00 0.16 5.88 6.08 6.19 6.30 6.51 4790 1
x_rand[1] 135.19 0.75 67.87 18.34 85.18 132.06 179.13 278.49 8296 1
x_rand[2] 5.29 0.03 2.41 1.00 4.00 5.00 7.00 10.00 8133 1
attempt 0.06 0.00 0.24 0.00 0.00 0.00 0.00 1.00 7560 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -35403.80 0.03 1.76 -35408.14 -35404.73 -35403.49 -35402.51 -35401.38 3435 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 19:55:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
print(fit_msurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 193.29 0.02 2.22 189.00 191.80 193.27 194.83 197.56 7883 1
mu[2] 6.02 0.00 0.08 5.86 5.97 6.02 6.08 6.19 8202 1
sigma[1] 135.56 0.02 1.87 132.03 134.29 135.53 136.83 139.29 6733 1
sigma[2] 5.10 0.00 0.08 4.95 5.05 5.10 5.15 5.25 6738 1
nu 11.72 0.02 1.35 9.45 10.76 11.58 12.56 14.76 6506 1
rho 0.57 0.00 0.01 0.54 0.56 0.57 0.58 0.60 7907 1
cov[1,1] 18380.00 6.19 508.20 17430.66 18032.66 18367.66 18723.49 19401.81 6740 1
cov[1,2] 396.36 0.20 15.81 365.40 385.79 396.30 407.09 427.00 6245 1
cov[2,1] 396.36 0.20 15.81 365.40 385.79 396.30 407.09 427.00 6245 1
cov[2,2] 25.98 0.01 0.78 24.50 25.45 25.98 26.51 27.56 6750 1
x_rand[1] 229.82 1.41 125.86 23.36 137.77 220.32 308.08 504.67 7929 1
x_rand[2] 7.63 0.05 4.51 0.66 4.30 7.12 10.37 17.97 7826 1
attempt 0.21 0.01 0.51 0.00 0.00 0.00 0.00 2.00 7854 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -43211.31 0.03 1.76 -43215.70 -43212.20 -43210.97 -43210.02 -43208.89 3913 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 20:47:52 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
print(fit_esurp)
Inference for Stan model: stats_correlation.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 123.52 0.01 1.17 121.28 122.73 123.52 124.30 125.81 7205 1
mu[2] 6.14 0.00 0.08 5.97 6.08 6.13 6.19 6.30 7266 1
sigma[1] 72.79 0.01 0.95 70.95 72.15 72.77 73.43 74.64 6894 1
sigma[2] 5.16 0.00 0.07 5.01 5.11 5.16 5.21 5.30 5938 1
nu 14.77 0.03 1.97 11.49 13.38 14.56 15.97 19.18 5988 1
rho 0.63 0.00 0.01 0.61 0.62 0.63 0.64 0.66 8585 1
cov[1,1] 5299.28 1.66 138.13 5033.28 5205.63 5295.99 5392.41 5571.18 6894 1
cov[1,2] 237.27 0.11 8.26 221.19 231.62 237.24 242.84 253.80 6152 1
cov[2,1] 237.27 0.11 8.26 221.19 231.62 237.24 242.84 253.80 6152 1
cov[2,2] 26.59 0.01 0.77 25.10 26.07 26.58 27.11 28.14 5952 1
x_rand[1] 141.12 0.80 69.27 22.01 91.25 136.11 184.96 290.51 7508 1
x_rand[2] 7.68 0.05 4.55 0.67 4.31 7.15 10.45 17.96 7265 1
attempt 0.17 0.00 0.45 0.00 0.00 0.00 0.00 1.00 8132 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ -40032.38 0.03 1.76 -40036.66 -40033.33 -40032.04 -40031.08 -40029.99 3594 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 21:06:56 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# MoTR & Log Freq
stan_trace(fit_mfreq)
stan_dens(fit_mfreq, separate_chains = TRUE)
stan_plot(fit_mfreq)
# EyeTR & Log Freq
stan_trace(fit_efreq)
stan_dens(fit_efreq, separate_chains = TRUE)
stan_plot(fit_efreq)
# MoTR & Len
stan_trace(fit_mlen)
stan_dens(fit_mlen, separate_chains = TRUE)
stan_plot(fit_mlen)
# EyeTR & Len
stan_trace(fit_elen)
stan_dens(fit_elen, separate_chains = TRUE)
stan_plot(fit_elen)
# MoTR & Surprisal
stan_trace(fit_msurp)
stan_dens(fit_msurp, separate_chains = TRUE)
stan_plot(fit_msurp)
# EyeTR & Surprisal
stan_trace(fit_esurp)
stan_dens(fit_esurp, separate_chains = TRUE)
stan_plot(fit_esurp)
print('---------------------------- MoTR & Log Freq --------------------------------------------')
[1] "---------------------------- MoTR & Log Freq --------------------------------------------"
rho_mfreq = as.numeric(extract(fit_mfreq, "rho")[[1]])
mean = mean(rho_mfreq)
crI = quantile(rho_mfreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mfreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.7613
HPD: [-0.776, -0.7461]
crI: [-0.7762, -0.7462]
print('---------------------------- EyeTR & Log Freq --------------------------------------------')
[1] "---------------------------- EyeTR & Log Freq --------------------------------------------"
rho_efreq = as.numeric(extract(fit_efreq, "rho")[[1]])
mean = mean(rho_efreq)
crI = quantile(rho_efreq, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_efreq), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: -0.8182
HPD: [-0.8297, -0.8065]
crI: [-0.8295, -0.8062]
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
rho_mlen = as.numeric(extract(fit_mlen, "rho")[[1]])
mean = mean(rho_mlen)
crI = quantile(rho_mlen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_mlen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.8867
HPD: [0.8784, 0.8943]
crI: [0.8784, 0.8943]
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
rho_elen = as.numeric(extract(fit_elen, "rho")[[1]])
mean = mean(rho_elen)
crI = quantile(rho_elen, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_elen), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.883
HPD: [0.8743, 0.8908]
crI: [0.8746, 0.8911]
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
rho_msurp = as.numeric(extract(fit_msurp, "rho")[[1]])
mean = mean(rho_msurp)
crI = quantile(rho_msurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_msurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.5735
HPD: [0.5447, 0.6033]
crI: [0.5432, 0.602]
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
rho_esurp = as.numeric(extract(fit_esurp, "rho")[[1]])
mean = mean(rho_esurp)
crI = quantile(rho_esurp, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_esurp), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]")
Mean: 0.632
HPD: [0.6075, 0.6574]
crI: [0.6066, 0.6569]
print('---------------------------- MoTR & Log Frequency--------------------------------------------')
[1] "---------------------------- MoTR & Log Frequency--------------------------------------------"
mfreq_rand <- extract(fit_mfreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 12), type="n",
xlab = "MoTR value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mfreq_rand[,1], mfreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mfreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mfreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mfreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Log Frequency--------------------------------------------')
[1] "---------------------------- EyeTR & Log Frequency--------------------------------------------"
efreq_rand <- extract(fit_efreq, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 500), ylim=c(0, 12), type="n",
xlab = "Eye tracking value", ylab = "Log Frequency", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(efreq_rand[,1], efreq_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(efreq_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(efreq_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(efreq_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Length --------------------------------------------')
[1] "---------------------------- MoTR & Length --------------------------------------------"
mlen_rand <- extract(fit_mlen, "x_rand")[[1]]
# mlen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(mlen_rand[,1], mlen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(mlen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(mlen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(mlen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Length --------------------------------------------')
[1] "---------------------------- EyeTR & Length --------------------------------------------"
elen_rand <- extract(fit_elen, "x_rand")[[1]]
# elen_rand
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Length", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elen_rand[,1], elen_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(elen_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elen_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elen_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- MoTR & Surprisal --------------------------------------------')
[1] "---------------------------- MoTR & Surprisal --------------------------------------------"
msurp_rand <- extract(fit_msurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "MoTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(msurp_rand [,1], msurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(msurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(msurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(msurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- EyeTR & Surprisal --------------------------------------------')
[1] "---------------------------- EyeTR & Surprisal --------------------------------------------"
esurp_rand <- extract(fit_esurp, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 800), ylim=c(0, 20), type="n",
xlab = "EyeTR value", ylab = "Word Surprisal", main = "Gaze Duration") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(esurp_rand [,1], esurp_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(esurp_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(esurp_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(esurp_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print("EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 ")
[1] "EyeTR vs. EyeTR Fisrt Pass Regression Prob. < 0.3 "
ereg_df = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5))
ereg_df_low = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 < 0.3)
# View(ereg_df_low)
ereg_df_high = provo_eyetr_grouped_df %>% filter(metric == "FPReg") %>% distinct() %>% #group_by(text_id, metric, measure) %>%
# summarize(value = mean(value)) %>%
filter(!(row_number() %in% c(443, 444, 445, 446))) %>%
spread(measure, value) %>%
# smoothing, if includes 0s
mutate(eyetr_value_1 = pmax(value_1, 1e-5),
eyetr_value_2 = pmax(value_2, 1e-5)) %>%
filter(eyetr_value_1 >= 0.3)
# View(ereg_df_high)
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$estimate)
cor
0.741
print(cor.test(ereg_df$eyetr_value_1, ereg_df$eyetr_value_2)$p.value)
[1] 0
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$estimate)
cor
0.4586
print(cor.test(ereg_df_low$eyetr_value_1, ereg_df_low$eyetr_value_2)$p.value)
[1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000293
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$estimate)
cor
0.6653
print(cor.test(ereg_df_high$eyetr_value_1, ereg_df_high$eyetr_value_2)$p.value)
[1] 0.000000000000000000000000000000000000000002144
# View(egd_df)
ereg_df %>%
gather(measure, value, 5:6) %>%
ggplot(aes(x = value)) +
geom_density() +
facet_wrap(~measure, scales = "free") +
theme_bw() +
scale_fill_brewer(palette = "Set1")
ereg_temp <- ereg_df[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_low <- ereg_df_low[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
ereg_temp_high <- ereg_df_high[c("eyetr_value_1", "eyetr_value_2")] %>%
drop_na() %>%
data.matrix()
# Set up the plotting area with two side-by-side plots
par(mfrow = c(1, 3))
# Plot the first data matrix gd_temp
plot(ereg_temp, pch = 16, col = "blue",
main = "FPReg all data Not Log-Transformed")
plot(ereg_temp_low, pch = 16, col = "blue",
main = "FPReg < 0.3 Not Log-Transformed")
plot(ereg_temp_high, pch = 16, col = "blue",
main = "FPReg > 0.3 Not Log-Transformed")
# -------fit model eyetr vs. eyetr FPReg <0.3 & >=0.3 ----------
reg_data = list(x=ereg_temp, N=nrow(ereg_temp))
fit_reg = stan(
# file="stan_models/bivariate_beta_correlation_reg.stan",
file = "stan_models/bivariate_normal_reg.stan",
data=reg_data,
iter=4000,
chains=4,
cores=4,
seed=444,
# control=list(adapt_delta=0.99),
verbose = FALSE
)
# Save the model
fit_reg@stanmodel@dso <- new("cxxdso")
saveRDS(fit_reg, file = paste0("eyetr_eyetr_FPReg_cor_all_data.rds"))
# fit_ereg_all = readRDS("./eyetr_eyetr_FPReg_cor_all_data.rds")
fit_ereg_all = readRDS("././eyetr_eyetr_FPReg_cor.rds")
fit_ereg_low = readRDS("./eyetr_eyetr_FPReg_cor_00-03.rds")
fit_ereg_high = readRDS("./eyetr_eyetr_FPReg_cor_03-1.rds")
print('---------------------------- First Pass Regression Prob. all data --------------------------------------------')
[1] "---------------------------- First Pass Regression Prob. all data --------------------------------------------"
print(fit_ereg_all)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.11 0.00 0.00 0.10 0.10 0.11 0.11 0.11 7352 1
mu[2] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 6993 1
sigma[1] 0.11 0.00 0.00 0.10 0.11 0.11 0.11 0.11 5584 1
sigma[2] 0.10 0.00 0.00 0.10 0.10 0.10 0.10 0.11 5441 1
nu 3.16 0.00 0.13 2.92 3.07 3.16 3.24 3.42 6221 1
rho 0.73 0.00 0.01 0.71 0.73 0.73 0.74 0.76 7209 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5581 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 4877 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 5442 1
x_rand[1] 0.17 0.00 0.14 0.01 0.08 0.14 0.22 0.47 7543 1
x_rand[2] 0.17 0.00 0.14 0.01 0.08 0.14 0.21 0.46 8201 1
attempt 0.37 0.01 0.70 0.00 0.00 0.00 1.00 2.00 7941 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 11696.50 0.03 1.76 11692.31 11695.54 11696.82 11697.81 11698.90 3562 1
Samples were drawn using NUTS(diag_e) at Sun Jul 23 01:21:18 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.< 0.3--------------------------------------------"
print(fit_ereg_low)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.11 0.00 0.00 0.11 0.11 0.11 0.11 0.12 7590 1
mu[2] 0.12 0.00 0.00 0.12 0.12 0.12 0.12 0.12 6828 1
sigma[1] 0.08 0.00 0.00 0.07 0.08 0.08 0.08 0.08 7489 1
sigma[2] 0.09 0.00 0.00 0.08 0.08 0.09 0.09 0.09 6449 1
nu 24.44 0.08 6.00 16.09 20.27 23.37 27.29 39.52 5520 1
rho 0.47 0.00 0.02 0.44 0.46 0.47 0.49 0.51 8485 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 7484 1
cov[1,2] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6928 1
cov[2,1] 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 6928 1
cov[2,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.01 6439 1
x_rand[1] 0.13 0.00 0.07 0.01 0.08 0.12 0.17 0.28 7848 1
x_rand[2] 0.14 0.00 0.08 0.01 0.08 0.13 0.19 0.31 7951 1
attempt 0.17 0.00 0.44 0.00 0.00 0.00 0.00 1.00 7714 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 7720.01 0.03 1.74 7715.76 7719.08 7720.31 7721.28 7722.38 3981 1
Samples were drawn using NUTS(diag_e) at Sun Aug 6 00:32:16 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
print('---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression Prob.>= 0.3--------------------------------------------"
print(fit_ereg_high)
Inference for Stan model: bivariate_normal_reg.
4 chains, each with iter=4000; warmup=2000; thin=1;
post-warmup draws per chain=2000, total post-warmup draws=8000.
mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
mu[1] 0.44 0.00 0.01 0.43 0.44 0.44 0.45 0.46 5432 1
mu[2] 0.37 0.00 0.01 0.35 0.36 0.37 0.38 0.39 6127 1
sigma[1] 0.12 0.00 0.01 0.11 0.11 0.12 0.12 0.13 5531 1
sigma[2] 0.16 0.00 0.01 0.15 0.16 0.16 0.17 0.18 5479 1
nu 28.02 0.17 12.90 11.34 18.95 25.20 34.27 60.04 6081 1
rho 0.67 0.00 0.03 0.60 0.65 0.67 0.69 0.73 6652 1
cov[1,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 5539 1
cov[1,2] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 4763 1
cov[2,1] 0.01 0.00 0.00 0.01 0.01 0.01 0.01 0.02 4763 1
cov[2,2] 0.03 0.00 0.00 0.02 0.02 0.03 0.03 0.03 5491 1
x_rand[1] 0.44 0.00 0.12 0.21 0.36 0.45 0.53 0.68 7922 1
x_rand[2] 0.38 0.00 0.16 0.08 0.26 0.37 0.48 0.71 8307 1
attempt 0.02 0.00 0.13 0.00 0.00 0.00 0.00 0.00 8124 1
max_attempts 10.00 NaN 0.00 10.00 10.00 10.00 10.00 10.00 NaN NaN
lp__ 793.27 0.03 1.79 788.73 792.39 793.63 794.56 795.67 3841 1
Samples were drawn using NUTS(diag_e) at Sun Aug 6 00:36:26 2023.
For each parameter, n_eff is a crude measure of effective sample size,
and Rhat is the potential scale reduction factor on split chains (at
convergence, Rhat=1).
# # FPReg all data
stan_trace(fit_ereg_all)
'pars' not specified. Showing first 10 parameters by default.
stan_dens(fit_ereg_all, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.
stan_plot(fit_ereg_all)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)
# # FPReg < 0.3
stan_trace(fit_ereg_low)
'pars' not specified. Showing first 10 parameters by default.
stan_dens(fit_ereg_low, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.
stan_plot(fit_ereg_low)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)
# FPReg >= 0.3
stan_trace(fit_ereg_high)
'pars' not specified. Showing first 10 parameters by default.
stan_dens(fit_ereg_high, separate_chains = TRUE)
'pars' not specified. Showing first 10 parameters by default.
stan_plot(fit_ereg_high)
'pars' not specified. Showing first 10 parameters by default.
ci_level: 0.8 (80% intervals)
outer_level: 0.95 (95% intervals)
print('---------------------------- First Pass Regression all data--------------------------------------------')
[1] "---------------------------- First Pass Regression all data--------------------------------------------"
rho_ereg_all = as.numeric(extract(fit_ereg_all, "rho")[[1]])
mean = mean(rho_ereg_all)
crI = quantile(rho_ereg_all, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_all), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.7339
HPD: [0.7128, 0.7557]
crI: [0.7119, 0.755]
print('---------------------------- First Pass Regression < 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3--------------------------------------------"
rho_ereg_low = as.numeric(extract(fit_ereg_low, "rho")[[1]])
mean = mean(rho_ereg_low)
crI = quantile(rho_ereg_low, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_low), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.4739
HPD: [0.4393, 0.5071]
crI: [0.4395, 0.5073]
print('---------------------------- First Pass Regression >= 0.3--------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3--------------------------------------------"
rho_ereg_high = as.numeric(extract(fit_ereg_high, "rho")[[1]])
mean = mean(rho_ereg_high)
crI = quantile(rho_ereg_high, c(.025, .975))
hpd99 = HPDinterval(as.mcmc(rho_ereg_high), prob=0.95)
cat("Mean: ", mean, "\nHPD: [", hpd99[,"lower"], ", ", hpd99[,"upper"], "]", sep="", "\ncrI: [", crI[1], ", ", crI[2], "]\n")
Mean: 0.6711
HPD: [0.607, 0.7342]
crI: [0.6033, 0.7315]
print('---------------------------- First Pass Regression all data --------------------------------------------')
[1] "---------------------------- First Pass Regression all data --------------------------------------------"
eallreg_rand <- extract(fit_ereg_all, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(eallreg_rand[,1], eallreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp, pch=16, col="red")
# add dataEllipse with color
dataEllipse(eallreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(eallreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression < 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression < 0.3 --------------------------------------------"
elowreg_rand <- extract(fit_ereg_low, "x_rand")[[1]]
# print(elowreg_rand)
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(elowreg_rand[,1], elowreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_low, pch=16, col="red")
# add dataEllipse with color
dataEllipse(elowreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(elowreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")
print('---------------------------- First Pass Regression >= 0.3 --------------------------------------------')
[1] "---------------------------- First Pass Regression >= 0.3 --------------------------------------------"
ehighreg_rand_samples <- extract(fit_ereg_high, "x_rand")[[1]]
# print(mhighreg_rand_samples)
selected_indices <- sample(1:nrow(ehighreg_rand_samples), 900)
ehighreg_rand <- ehighreg_rand_samples[selected_indices, ]
# mhighreg_rand <- extract(fit_mreg_high, "x_rand")[[1]]
# create a blank plot first with appropriate limits
plot(1, 1, xlim=c(0, 1), ylim=c(0, 1), type="n",
xlab = "Eye tracking value", ylab = "MoTR value", main = "FPReg") # 'type = "n"' makes sure the plot is blank
# add points for x_rand with color
points(ehighreg_rand[,1], ehighreg_rand[,2], col = "black", pch = 16)
# add points for gd_temp with color red
points(ereg_temp_high, pch=16, col="red")
# add dataEllipse with color
dataEllipse(ehighreg_rand, levels = c(0.5, 0.75), fill=T, plot.points = F, col="orange")
dataEllipse(ehighreg_rand, levels = c(0.95, 0.99), fill=T, plot.points = F, col="blue")